{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Part 1: Introduction to Tensors"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1 2 3 4 5]\n",
      "[ 2  4  6  8 10]\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "\n",
    "class Tensor (object):\n",
    "    \n",
    "    def __init__(self, data):\n",
    "        self.data = np.array(data)\n",
    "    \n",
    "    def __add__(self, other):\n",
    "        return Tensor(self.data + other.data)\n",
    "    \n",
    "    def __repr__(self):\n",
    "        return str(self.data.__repr__())\n",
    "    \n",
    "    def __str__(self):\n",
    "        return str(self.data.__str__())\n",
    "    \n",
    "x = Tensor([1,2,3,4,5])\n",
    "print(x)\n",
    "\n",
    "y = x + x\n",
    "print(y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Part 2: Introduction to Autograd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "class Tensor (object):\n",
    "    \n",
    "    def __init__(self, data, creators=None, creation_op = None):\n",
    "        self.data = np.array(data)\n",
    "        self.creation_op = creation_op\n",
    "        self.creators = creators\n",
    "        self.grad = None\n",
    "    \n",
    "    def backward(self, grad):\n",
    "        self.grad = grad\n",
    "        \n",
    "        if(self.creation_op == \"add\"):\n",
    "            self.creators[0].backward(grad)\n",
    "            self.creators[1].backward(grad)\n",
    "\n",
    "    def __add__(self, other):\n",
    "        return Tensor(self.data + other.data,  creators=[self,other], creation_op=\"add\")\n",
    "    \n",
    "    def __repr__(self):\n",
    "        return str(self.data.__repr__())\n",
    "    \n",
    "    def __str__(self):\n",
    "        return str(self.data.__str__())\n",
    "    \n",
    "x = Tensor([1,2,3,4,5])\n",
    "y = Tensor([2,2,2,2,2])\n",
    "\n",
    "z = x + y\n",
    "z.backward(Tensor(np.array([1,1,1,1,1])))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1 1 1 1 1]\n",
      "[1 1 1 1 1]\n",
      "[array([1, 2, 3, 4, 5]), array([2, 2, 2, 2, 2])]\n",
      "add\n"
     ]
    }
   ],
   "source": [
    "print(x.grad)\n",
    "print(y.grad)\n",
    "print(z.creators)\n",
    "print(z.creation_op)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1 1 1 1 1]\n"
     ]
    }
   ],
   "source": [
    "a = Tensor([1,2,3,4,5])\n",
    "b = Tensor([2,2,2,2,2])\n",
    "c = Tensor([5,4,3,2,1])\n",
    "d = Tensor([-1,-2,-3,-4,-5])\n",
    "\n",
    "e = a + b\n",
    "f = c + d\n",
    "g = e + f\n",
    "\n",
    "g.backward(Tensor(np.array([1,1,1,1,1])))\n",
    "\n",
    "print(a.grad)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Part 3: Tensors That Are Used Multiple Times"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([False, False, False, False, False])"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a = Tensor([1,2,3,4,5])\n",
    "b = Tensor([2,2,2,2,2])\n",
    "c = Tensor([5,4,3,2,1])\n",
    "\n",
    "d = a + b\n",
    "e = b + c\n",
    "f = d + e\n",
    "f.backward(Tensor(np.array([1,1,1,1,1])))\n",
    "\n",
    "b.grad.data == np.array([2,2,2,2,2])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Part 4: Upgrading Autograd to Support Multiple Tensors"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[ True  True  True  True  True]\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "\n",
    "class Tensor (object):\n",
    "    \n",
    "    def __init__(self,data,\n",
    "                 autograd=False,\n",
    "                 creators=None,\n",
    "                 creation_op=None,\n",
    "                 id=None):\n",
    "        \n",
    "        self.data = np.array(data)\n",
    "        self.autograd = autograd\n",
    "        self.grad = None\n",
    "        if(id is None):\n",
    "            self.id = np.random.randint(0,100000)\n",
    "        else:\n",
    "            self.id = id\n",
    "        \n",
    "        self.creators = creators\n",
    "        self.creation_op = creation_op\n",
    "        self.children = {}\n",
    "        \n",
    "        if(creators is not None):\n",
    "            for c in creators:\n",
    "                if(self.id not in c.children):\n",
    "                    c.children[self.id] = 1\n",
    "                else:\n",
    "                    c.children[self.id] += 1\n",
    "\n",
    "    def all_children_grads_accounted_for(self):\n",
    "        for id,cnt in self.children.items():\n",
    "            if(cnt != 0):\n",
    "                return False\n",
    "        return True        \n",
    "        \n",
    "    def backward(self,grad=None, grad_origin=None):\n",
    "        if(self.autograd):\n",
    "            if(grad is None):\n",
    "                grad = FloatTensor(np.ones_like(self.data))\n",
    "            \n",
    "            if(grad_origin is not None):\n",
    "                if(self.children[grad_origin.id] == 0):\n",
    "                    raise Exception(\"cannot backprop more than once\")\n",
    "                else:\n",
    "                    self.children[grad_origin.id] -= 1\n",
    "\n",
    "            if(self.grad is None):\n",
    "                self.grad = grad\n",
    "            else:\n",
    "                self.grad += grad\n",
    "            \n",
    "            # grads must not have grads of their own\n",
    "            assert grad.autograd == False\n",
    "            \n",
    "            # only continue backpropping if there's something to\n",
    "            # backprop into and if all gradients (from children)\n",
    "            # are accounted for override waiting for children if\n",
    "            # \"backprop\" was called on this variable directly\n",
    "            if(self.creators is not None and \n",
    "               (self.all_children_grads_accounted_for() or \n",
    "                grad_origin is None)):\n",
    "\n",
    "                if(self.creation_op == \"add\"):\n",
    "                    self.creators[0].backward(self.grad, self)\n",
    "                    self.creators[1].backward(self.grad, self)\n",
    "                    \n",
    "    def __add__(self, other):\n",
    "        if(self.autograd and other.autograd):\n",
    "            return Tensor(self.data + other.data,\n",
    "                          autograd=True,\n",
    "                          creators=[self,other],\n",
    "                          creation_op=\"add\")\n",
    "        return Tensor(self.data + other.data)\n",
    "\n",
    "    def __repr__(self):\n",
    "        return str(self.data.__repr__())\n",
    "    \n",
    "    def __str__(self):\n",
    "        return str(self.data.__str__())  \n",
    "    \n",
    "a = Tensor([1,2,3,4,5], autograd=True)\n",
    "b = Tensor([2,2,2,2,2], autograd=True)\n",
    "c = Tensor([5,4,3,2,1], autograd=True)\n",
    "\n",
    "d = a + b\n",
    "e = b + c\n",
    "f = d + e\n",
    "\n",
    "f.backward(Tensor(np.array([1,1,1,1,1])))\n",
    "\n",
    "print(b.grad.data == np.array([2,2,2,2,2]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Part 5: Add Support for Negation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[ True  True  True  True  True]\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "\n",
    "class Tensor (object):\n",
    "    \n",
    "    def __init__(self,data,\n",
    "                 autograd=False,\n",
    "                 creators=None,\n",
    "                 creation_op=None,\n",
    "                 id=None):\n",
    "        \n",
    "        self.data = np.array(data)\n",
    "        self.autograd = autograd\n",
    "        self.grad = None\n",
    "        if(id is None):\n",
    "            self.id = np.random.randint(0,100000)\n",
    "        else:\n",
    "            self.id = id\n",
    "        \n",
    "        self.creators = creators\n",
    "        self.creation_op = creation_op\n",
    "        self.children = {}\n",
    "        \n",
    "        if(creators is not None):\n",
    "            for c in creators:\n",
    "                if(self.id not in c.children):\n",
    "                    c.children[self.id] = 1\n",
    "                else:\n",
    "                    c.children[self.id] += 1\n",
    "\n",
    "    def all_children_grads_accounted_for(self):\n",
    "        for id,cnt in self.children.items():\n",
    "            if(cnt != 0):\n",
    "                return False\n",
    "        return True        \n",
    "        \n",
    "    def backward(self,grad=None, grad_origin=None):\n",
    "        if(self.autograd):\n",
    "            if(grad is None):\n",
    "                grad = FloatTensor(np.ones_like(self.data))\n",
    "            \n",
    "            if(grad_origin is not None):\n",
    "                if(self.children[grad_origin.id] == 0):\n",
    "                    raise Exception(\"cannot backprop more than once\")\n",
    "                else:\n",
    "                    self.children[grad_origin.id] -= 1\n",
    "\n",
    "            if(self.grad is None):\n",
    "                self.grad = grad\n",
    "            else:\n",
    "                self.grad += grad\n",
    "            \n",
    "            # grads must not have grads of their own\n",
    "            assert grad.autograd == False\n",
    "            \n",
    "            # only continue backpropping if there's something to\n",
    "            # backprop into and if all gradients (from children)\n",
    "            # are accounted for override waiting for children if\n",
    "            # \"backprop\" was called on this variable directly\n",
    "            if(self.creators is not None and \n",
    "               (self.all_children_grads_accounted_for() or \n",
    "                grad_origin is None)):\n",
    "\n",
    "                if(self.creation_op == \"add\"):\n",
    "                    self.creators[0].backward(self.grad, self)\n",
    "                    self.creators[1].backward(self.grad, self)\n",
    "                    \n",
    "                if(self.creation_op == \"neg\"):\n",
    "                    self.creators[0].backward(self.grad.__neg__())\n",
    "                    \n",
    "    def __add__(self, other):\n",
    "        if(self.autograd and other.autograd):\n",
    "            return Tensor(self.data + other.data,\n",
    "                          autograd=True,\n",
    "                          creators=[self,other],\n",
    "                          creation_op=\"add\")\n",
    "        return Tensor(self.data + other.data)\n",
    "\n",
    "    def __neg__(self):\n",
    "        if(self.autograd):\n",
    "            return Tensor(self.data * -1,\n",
    "                          autograd=True,\n",
    "                          creators=[self],\n",
    "                          creation_op=\"neg\")\n",
    "        return Tensor(self.data * -1) \n",
    "    \n",
    "    def __repr__(self):\n",
    "        return str(self.data.__repr__())\n",
    "    \n",
    "    def __str__(self):\n",
    "        return str(self.data.__str__())  \n",
    "    \n",
    "a = Tensor([1,2,3,4,5], autograd=True)\n",
    "b = Tensor([2,2,2,2,2], autograd=True)\n",
    "c = Tensor([5,4,3,2,1], autograd=True)\n",
    "\n",
    "d = a + (-b)\n",
    "e = (-b) + c\n",
    "f = d + e\n",
    "\n",
    "f.backward(Tensor(np.array([1,1,1,1,1])))\n",
    "\n",
    "print(b.grad.data == np.array([-2,-2,-2,-2,-2]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Part 6: Add Support for Additional Functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[ True  True  True  True  True]\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "\n",
    "class Tensor (object):\n",
    "    \n",
    "    def __init__(self,data,\n",
    "                 autograd=False,\n",
    "                 creators=None,\n",
    "                 creation_op=None,\n",
    "                 id=None):\n",
    "        \n",
    "        self.data = np.array(data)\n",
    "        self.autograd = autograd\n",
    "        self.grad = None\n",
    "        if(id is None):\n",
    "            self.id = np.random.randint(0,100000)\n",
    "        else:\n",
    "            self.id = id\n",
    "        \n",
    "        self.creators = creators\n",
    "        self.creation_op = creation_op\n",
    "        self.children = {}\n",
    "        \n",
    "        if(creators is not None):\n",
    "            for c in creators:\n",
    "                if(self.id not in c.children):\n",
    "                    c.children[self.id] = 1\n",
    "                else:\n",
    "                    c.children[self.id] += 1\n",
    "\n",
    "    def all_children_grads_accounted_for(self):\n",
    "        for id,cnt in self.children.items():\n",
    "            if(cnt != 0):\n",
    "                return False\n",
    "        return True \n",
    "        \n",
    "    def backward(self,grad=None, grad_origin=None):\n",
    "        if(self.autograd):\n",
    " \n",
    "            if(grad is None):\n",
    "                grad = Tensor(np.ones_like(self.data))\n",
    "\n",
    "            if(grad_origin is not None):\n",
    "                if(self.children[grad_origin.id] == 0):\n",
    "                    raise Exception(\"cannot backprop more than once\")\n",
    "                else:\n",
    "                    self.children[grad_origin.id] -= 1\n",
    "\n",
    "            if(self.grad is None):\n",
    "                self.grad = grad\n",
    "            else:\n",
    "                self.grad += grad\n",
    "            \n",
    "            # grads must not have grads of their own\n",
    "            assert grad.autograd == False\n",
    "            \n",
    "            # only continue backpropping if there's something to\n",
    "            # backprop into and if all gradients (from children)\n",
    "            # are accounted for override waiting for children if\n",
    "            # \"backprop\" was called on this variable directly\n",
    "            if(self.creators is not None and \n",
    "               (self.all_children_grads_accounted_for() or \n",
    "                grad_origin is None)):\n",
    "\n",
    "                if(self.creation_op == \"add\"):\n",
    "                    self.creators[0].backward(self.grad, self)\n",
    "                    self.creators[1].backward(self.grad, self)\n",
    "                    \n",
    "                if(self.creation_op == \"sub\"):\n",
    "                    self.creators[0].backward(Tensor(self.grad.data), self)\n",
    "                    self.creators[1].backward(Tensor(self.grad.__neg__().data), self)\n",
    "\n",
    "                if(self.creation_op == \"mul\"):\n",
    "                    new = self.grad * self.creators[1]\n",
    "                    self.creators[0].backward(new , self)\n",
    "                    new = self.grad * self.creators[0]\n",
    "                    self.creators[1].backward(new, self)                    \n",
    "                    \n",
    "                if(self.creation_op == \"mm\"):\n",
    "                    c0 = self.creators[0]\n",
    "                    c1 = self.creators[1]\n",
    "                    new = self.grad.mm(c1.transpose())\n",
    "                    c0.backward(new)\n",
    "                    new = self.grad.transpose().mm(c0).transpose()\n",
    "                    c1.backward(new)\n",
    "                    \n",
    "                if(self.creation_op == \"transpose\"):\n",
    "                    self.creators[0].backward(self.grad.transpose())\n",
    "\n",
    "                if(\"sum\" in self.creation_op):\n",
    "                    dim = int(self.creation_op.split(\"_\")[1])\n",
    "                    self.creators[0].backward(self.grad.expand(dim,\n",
    "                                                               self.creators[0].data.shape[dim]))\n",
    "\n",
    "                if(\"expand\" in self.creation_op):\n",
    "                    dim = int(self.creation_op.split(\"_\")[1])\n",
    "                    self.creators[0].backward(self.grad.sum(dim))\n",
    "                    \n",
    "                if(self.creation_op == \"neg\"):\n",
    "                    self.creators[0].backward(self.grad.__neg__())\n",
    "                    \n",
    "    def __add__(self, other):\n",
    "        if(self.autograd and other.autograd):\n",
    "            return Tensor(self.data + other.data,\n",
    "                          autograd=True,\n",
    "                          creators=[self,other],\n",
    "                          creation_op=\"add\")\n",
    "        return Tensor(self.data + other.data)\n",
    "\n",
    "    def __neg__(self):\n",
    "        if(self.autograd):\n",
    "            return Tensor(self.data * -1,\n",
    "                          autograd=True,\n",
    "                          creators=[self],\n",
    "                          creation_op=\"neg\")\n",
    "        return Tensor(self.data * -1)\n",
    "    \n",
    "    def __sub__(self, other):\n",
    "        if(self.autograd and other.autograd):\n",
    "            return Tensor(self.data - other.data,\n",
    "                          autograd=True,\n",
    "                          creators=[self,other],\n",
    "                          creation_op=\"sub\")\n",
    "        return Tensor(self.data - other.data)\n",
    "    \n",
    "    def __mul__(self, other):\n",
    "        if(self.autograd and other.autograd):\n",
    "            return Tensor(self.data * other.data,\n",
    "                          autograd=True,\n",
    "                          creators=[self,other],\n",
    "                          creation_op=\"mul\")\n",
    "        return Tensor(self.data * other.data)    \n",
    "\n",
    "    def sum(self, dim):\n",
    "        if(self.autograd):\n",
    "            return Tensor(self.data.sum(dim),\n",
    "                          autograd=True,\n",
    "                          creators=[self],\n",
    "                          creation_op=\"sum_\"+str(dim))\n",
    "        return Tensor(self.data.sum(dim))\n",
    "    \n",
    "    def expand(self, dim,copies):\n",
    "\n",
    "        trans_cmd = list(range(0,len(self.data.shape)))\n",
    "        trans_cmd.insert(dim,len(self.data.shape))\n",
    "        new_data = self.data.repeat(copies).reshape(list(self.data.shape) + [copies]).transpose(trans_cmd)\n",
    "        \n",
    "        if(self.autograd):\n",
    "            return Tensor(new_data,\n",
    "                          autograd=True,\n",
    "                          creators=[self],\n",
    "                          creation_op=\"expand_\"+str(dim))\n",
    "        return Tensor(new_data)\n",
    "    \n",
    "    def transpose(self):\n",
    "        if(self.autograd):\n",
    "            return Tensor(self.data.transpose(),\n",
    "                          autograd=True,\n",
    "                          creators=[self],\n",
    "                          creation_op=\"transpose\")\n",
    "        \n",
    "        return Tensor(self.data.transpose())\n",
    "    \n",
    "    def mm(self, x):\n",
    "        if(self.autograd):\n",
    "            return Tensor(self.data.dot(x.data),\n",
    "                          autograd=True,\n",
    "                          creators=[self,x],\n",
    "                          creation_op=\"mm\")\n",
    "        return Tensor(self.data.dot(x.data))\n",
    "    \n",
    "    def __repr__(self):\n",
    "        return str(self.data.__repr__())\n",
    "    \n",
    "    def __str__(self):\n",
    "        return str(self.data.__str__())  \n",
    "    \n",
    "a = Tensor([1,2,3,4,5], autograd=True)\n",
    "b = Tensor([2,2,2,2,2], autograd=True)\n",
    "c = Tensor([5,4,3,2,1], autograd=True)\n",
    "\n",
    "d = a + b\n",
    "e = b + c\n",
    "f = d + e\n",
    "\n",
    "f.backward(Tensor(np.array([1,1,1,1,1])))\n",
    "\n",
    "print(b.grad.data == np.array([2,2,2,2,2]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# A few Notes on Sum and Expand"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = Tensor(np.array([[1,2,3],\n",
    "                     [4,5,6]]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([5, 7, 9])"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x.sum(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 6, 15])"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x.sum(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[[1, 1, 1, 1],\n",
       "        [2, 2, 2, 2],\n",
       "        [3, 3, 3, 3]],\n",
       "\n",
       "       [[4, 4, 4, 4],\n",
       "        [5, 5, 5, 5],\n",
       "        [6, 6, 6, 6]]])"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x.expand(dim=2, copies=4)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Part 7: Use Autograd to Train a Neural Network"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Previously we would train a model like this"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "5.066439994622395\n",
      "0.4959907791902342\n",
      "0.4180671892167177\n",
      "0.35298133007809646\n",
      "0.2972549636567377\n",
      "0.2492326038163328\n",
      "0.20785392075862477\n",
      "0.17231260916265176\n",
      "0.14193744536652986\n",
      "0.11613979792168384\n"
     ]
    }
   ],
   "source": [
    "import numpy\n",
    "np.random.seed(0)\n",
    "\n",
    "data = np.array([[0,0],[0,1],[1,0],[1,1]])\n",
    "target = np.array([[0],[1],[0],[1]])\n",
    "\n",
    "weights_0_1 = np.random.rand(2,3)\n",
    "weights_1_2 = np.random.rand(3,1)\n",
    "\n",
    "for i in range(10):\n",
    "    \n",
    "    # Predict\n",
    "    layer_1 = data.dot(weights_0_1)\n",
    "    layer_2 = layer_1.dot(weights_1_2)\n",
    "    \n",
    "    # Compare\n",
    "    diff = (layer_2 - target)\n",
    "    sqdiff = (diff * diff)\n",
    "    loss = sqdiff.sum(0) # mean squared error loss\n",
    "\n",
    "    # Learn: this is the backpropagation piece\n",
    "    layer_1_grad = diff.dot(weights_1_2.transpose())\n",
    "    weight_1_2_update = layer_1.transpose().dot(diff)\n",
    "    weight_0_1_update = data.transpose().dot(layer_1_grad)\n",
    "    \n",
    "    weights_1_2 -= weight_1_2_update * 0.1\n",
    "    weights_0_1 -= weight_0_1_update * 0.1\n",
    "    print(loss[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0.58128304]\n",
      "[0.48988149]\n",
      "[0.41375111]\n",
      "[0.34489412]\n",
      "[0.28210124]\n",
      "[0.2254484]\n",
      "[0.17538853]\n",
      "[0.1324231]\n",
      "[0.09682769]\n",
      "[0.06849361]\n"
     ]
    }
   ],
   "source": [
    "import numpy\n",
    "np.random.seed(0)\n",
    "\n",
    "data = Tensor(np.array([[0,0],[0,1],[1,0],[1,1]]), autograd=True)\n",
    "target = Tensor(np.array([[0],[1],[0],[1]]), autograd=True)\n",
    "\n",
    "w = list()\n",
    "w.append(Tensor(np.random.rand(2,3), autograd=True))\n",
    "w.append(Tensor(np.random.rand(3,1), autograd=True))\n",
    "\n",
    "for i in range(10):\n",
    "\n",
    "    # Predict\n",
    "    pred = data.mm(w[0]).mm(w[1])\n",
    "    \n",
    "    # Compare\n",
    "    loss = ((pred - target)*(pred - target)).sum(0)\n",
    "    \n",
    "    # Learn\n",
    "    loss.backward(Tensor(np.ones_like(loss.data)))\n",
    "\n",
    "    for w_ in w:\n",
    "        w_.data -= w_.grad.data * 0.1\n",
    "        w_.grad.data *= 0\n",
    "\n",
    "    print(loss)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Part 8: Adding Automatic Optimization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "class SGD(object):\n",
    "    \n",
    "    def __init__(self, parameters, alpha=0.1):\n",
    "        self.parameters = parameters\n",
    "        self.alpha = alpha\n",
    "    \n",
    "    def zero(self):\n",
    "        for p in self.parameters:\n",
    "            p.grad.data *= 0\n",
    "        \n",
    "    def step(self, zero=True):\n",
    "        \n",
    "        for p in self.parameters:\n",
    "            \n",
    "            p.data -= p.grad.data * self.alpha\n",
    "            \n",
    "            if(zero):\n",
    "                p.grad.data *= 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0.58128304]\n",
      "[0.48988149]\n",
      "[0.41375111]\n",
      "[0.34489412]\n",
      "[0.28210124]\n",
      "[0.2254484]\n",
      "[0.17538853]\n",
      "[0.1324231]\n",
      "[0.09682769]\n",
      "[0.06849361]\n"
     ]
    }
   ],
   "source": [
    "import numpy\n",
    "np.random.seed(0)\n",
    "\n",
    "data = Tensor(np.array([[0,0],[0,1],[1,0],[1,1]]), autograd=True)\n",
    "target = Tensor(np.array([[0],[1],[0],[1]]), autograd=True)\n",
    "\n",
    "w = list()\n",
    "w.append(Tensor(np.random.rand(2,3), autograd=True))\n",
    "w.append(Tensor(np.random.rand(3,1), autograd=True))\n",
    "\n",
    "optim = SGD(parameters=w, alpha=0.1)\n",
    "\n",
    "for i in range(10):\n",
    "\n",
    "    # Predict\n",
    "    pred = data.mm(w[0]).mm(w[1])\n",
    "    \n",
    "    # Compare\n",
    "    loss = ((pred - target)*(pred - target)).sum(0)\n",
    "    \n",
    "    # Learn\n",
    "    loss.backward(Tensor(np.ones_like(loss.data)))\n",
    "    optim.step()\n",
    "\n",
    "    print(loss)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Part 9: Adding Support for Layer Types"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Layer(object):\n",
    "    \n",
    "    def __init__(self):\n",
    "        self.parameters = list()\n",
    "        \n",
    "    def get_parameters(self):\n",
    "        return self.parameters\n",
    "\n",
    "\n",
    "class Linear(Layer):\n",
    "\n",
    "    def __init__(self, n_inputs, n_outputs):\n",
    "        super().__init__()\n",
    "        W = np.random.randn(n_inputs, n_outputs) * np.sqrt(2.0/(n_inputs))\n",
    "        self.weight = Tensor(W, autograd=True)\n",
    "        self.bias = Tensor(np.zeros(n_outputs), autograd=True)\n",
    "        \n",
    "        self.parameters.append(self.weight)\n",
    "        self.parameters.append(self.bias)\n",
    "\n",
    "    def forward(self, input):\n",
    "        return input.mm(self.weight)+self.bias.expand(0,len(input.data))\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Part 10: Layers Which Contain Layers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2.33428272]\n",
      "[0.06743796]\n",
      "[0.0521849]\n",
      "[0.04079507]\n",
      "[0.03184365]\n",
      "[0.02479336]\n",
      "[0.01925443]\n",
      "[0.01491699]\n",
      "[0.01153118]\n",
      "[0.00889602]\n"
     ]
    }
   ],
   "source": [
    "\n",
    "class Sequential(Layer):\n",
    "    \n",
    "    def __init__(self, layers=list()):\n",
    "        super().__init__()\n",
    "        \n",
    "        self.layers = layers\n",
    "    \n",
    "    def add(self, layer):\n",
    "        self.layers.append(layer)\n",
    "        \n",
    "    def forward(self, input):\n",
    "        for layer in self.layers:\n",
    "            input = layer.forward(input)\n",
    "        return input\n",
    "    \n",
    "    def get_parameters(self):\n",
    "        params = list()\n",
    "        for l in self.layers:\n",
    "            params += l.get_parameters()\n",
    "        return params\n",
    "    \n",
    "import numpy\n",
    "np.random.seed(0)\n",
    "\n",
    "data = Tensor(np.array([[0,0],[0,1],[1,0],[1,1]]), autograd=True)\n",
    "target = Tensor(np.array([[0],[1],[0],[1]]), autograd=True)\n",
    "\n",
    "model = Sequential([Linear(2,3), Linear(3,1)])\n",
    "\n",
    "optim = SGD(parameters=model.get_parameters(), alpha=0.05)\n",
    "\n",
    "for i in range(10):\n",
    "    \n",
    "    # Predict\n",
    "    pred = model.forward(data)\n",
    "    \n",
    "    # Compare\n",
    "    loss = ((pred - target)*(pred - target)).sum(0)\n",
    "    \n",
    "    # Learn\n",
    "    loss.backward(Tensor(np.ones_like(loss.data)))\n",
    "    optim.step()\n",
    "    print(loss)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Part 11: Loss Function Layers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2.33428272]\n",
      "[0.06743796]\n",
      "[0.0521849]\n",
      "[0.04079507]\n",
      "[0.03184365]\n",
      "[0.02479336]\n",
      "[0.01925443]\n",
      "[0.01491699]\n",
      "[0.01153118]\n",
      "[0.00889602]\n"
     ]
    }
   ],
   "source": [
    "class MSELoss(Layer):\n",
    "    \n",
    "    def __init__(self):\n",
    "        super().__init__()\n",
    "    \n",
    "    def forward(self, pred, target):\n",
    "        return ((pred - target)*(pred - target)).sum(0)\n",
    "    \n",
    "import numpy\n",
    "np.random.seed(0)\n",
    "\n",
    "data = Tensor(np.array([[0,0],[0,1],[1,0],[1,1]]), autograd=True)\n",
    "target = Tensor(np.array([[0],[1],[0],[1]]), autograd=True)\n",
    "\n",
    "model = Sequential([Linear(2,3), Linear(3,1)])\n",
    "criterion = MSELoss()\n",
    "\n",
    "optim = SGD(parameters=model.get_parameters(), alpha=0.05)\n",
    "\n",
    "for i in range(10):\n",
    "    \n",
    "    # Predict\n",
    "    pred = model.forward(data)\n",
    "    \n",
    "    # Compare\n",
    "    loss = criterion.forward(pred, target)\n",
    "    \n",
    "    # Learn\n",
    "    loss.backward(Tensor(np.ones_like(loss.data)))\n",
    "    optim.step()\n",
    "    print(loss)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Part 12: Non-linearity Layers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "class Tensor (object):\n",
    "    \n",
    "    def __init__(self,data,\n",
    "                 autograd=False,\n",
    "                 creators=None,\n",
    "                 creation_op=None,\n",
    "                 id=None):\n",
    "        \n",
    "        self.data = np.array(data)\n",
    "        self.autograd = autograd\n",
    "        self.grad = None\n",
    "        if(id is None):\n",
    "            self.id = np.random.randint(0,100000)\n",
    "        else:\n",
    "            self.id = id\n",
    "        \n",
    "        self.creators = creators\n",
    "        self.creation_op = creation_op\n",
    "        self.children = {}\n",
    "        \n",
    "        if(creators is not None):\n",
    "            for c in creators:\n",
    "                if(self.id not in c.children):\n",
    "                    c.children[self.id] = 1\n",
    "                else:\n",
    "                    c.children[self.id] += 1\n",
    "\n",
    "    def all_children_grads_accounted_for(self):\n",
    "        for id,cnt in self.children.items():\n",
    "            if(cnt != 0):\n",
    "                return False\n",
    "        return True \n",
    "        \n",
    "    def backward(self,grad=None, grad_origin=None):\n",
    "        if(self.autograd):\n",
    " \n",
    "            if(grad is None):\n",
    "                grad = Tensor(np.ones_like(self.data))\n",
    "\n",
    "            if(grad_origin is not None):\n",
    "                if(self.children[grad_origin.id] == 0):\n",
    "                    raise Exception(\"cannot backprop more than once\")\n",
    "                else:\n",
    "                    self.children[grad_origin.id] -= 1\n",
    "\n",
    "            if(self.grad is None):\n",
    "                self.grad = grad\n",
    "            else:\n",
    "                self.grad += grad\n",
    "            \n",
    "            # grads must not have grads of their own\n",
    "            assert grad.autograd == False\n",
    "            \n",
    "            # only continue backpropping if there's something to\n",
    "            # backprop into and if all gradients (from children)\n",
    "            # are accounted for override waiting for children if\n",
    "            # \"backprop\" was called on this variable directly\n",
    "            if(self.creators is not None and \n",
    "               (self.all_children_grads_accounted_for() or \n",
    "                grad_origin is None)):\n",
    "\n",
    "                if(self.creation_op == \"add\"):\n",
    "                    self.creators[0].backward(self.grad, self)\n",
    "                    self.creators[1].backward(self.grad, self)\n",
    "                    \n",
    "                if(self.creation_op == \"sub\"):\n",
    "                    self.creators[0].backward(Tensor(self.grad.data), self)\n",
    "                    self.creators[1].backward(Tensor(self.grad.__neg__().data), self)\n",
    "\n",
    "                if(self.creation_op == \"mul\"):\n",
    "                    new = self.grad * self.creators[1]\n",
    "                    self.creators[0].backward(new , self)\n",
    "                    new = self.grad * self.creators[0]\n",
    "                    self.creators[1].backward(new, self)                    \n",
    "                    \n",
    "                if(self.creation_op == \"mm\"):\n",
    "                    c0 = self.creators[0]\n",
    "                    c1 = self.creators[1]\n",
    "                    new = self.grad.mm(c1.transpose())\n",
    "                    c0.backward(new)\n",
    "                    new = self.grad.transpose().mm(c0).transpose()\n",
    "                    c1.backward(new)\n",
    "                    \n",
    "                if(self.creation_op == \"transpose\"):\n",
    "                    self.creators[0].backward(self.grad.transpose())\n",
    "\n",
    "                if(\"sum\" in self.creation_op):\n",
    "                    dim = int(self.creation_op.split(\"_\")[1])\n",
    "                    self.creators[0].backward(self.grad.expand(dim,\n",
    "                                                               self.creators[0].data.shape[dim]))\n",
    "\n",
    "                if(\"expand\" in self.creation_op):\n",
    "                    dim = int(self.creation_op.split(\"_\")[1])\n",
    "                    self.creators[0].backward(self.grad.sum(dim))\n",
    "                    \n",
    "                if(self.creation_op == \"neg\"):\n",
    "                    self.creators[0].backward(self.grad.__neg__())\n",
    "                    \n",
    "                if(self.creation_op == \"sigmoid\"):\n",
    "                    ones = Tensor(np.ones_like(self.grad.data))\n",
    "                    self.creators[0].backward(self.grad * (self * (ones - self)))\n",
    "                \n",
    "                if(self.creation_op == \"tanh\"):\n",
    "                    ones = Tensor(np.ones_like(self.grad.data))\n",
    "                    self.creators[0].backward(self.grad * (ones - (self * self)))\n",
    "                    \n",
    "    def __add__(self, other):\n",
    "        if(self.autograd and other.autograd):\n",
    "            return Tensor(self.data + other.data,\n",
    "                          autograd=True,\n",
    "                          creators=[self,other],\n",
    "                          creation_op=\"add\")\n",
    "        return Tensor(self.data + other.data)\n",
    "\n",
    "    def __neg__(self):\n",
    "        if(self.autograd):\n",
    "            return Tensor(self.data * -1,\n",
    "                          autograd=True,\n",
    "                          creators=[self],\n",
    "                          creation_op=\"neg\")\n",
    "        return Tensor(self.data * -1)\n",
    "    \n",
    "    def __sub__(self, other):\n",
    "        if(self.autograd and other.autograd):\n",
    "            return Tensor(self.data - other.data,\n",
    "                          autograd=True,\n",
    "                          creators=[self,other],\n",
    "                          creation_op=\"sub\")\n",
    "        return Tensor(self.data - other.data)\n",
    "    \n",
    "    def __mul__(self, other):\n",
    "        if(self.autograd and other.autograd):\n",
    "            return Tensor(self.data * other.data,\n",
    "                          autograd=True,\n",
    "                          creators=[self,other],\n",
    "                          creation_op=\"mul\")\n",
    "        return Tensor(self.data * other.data)    \n",
    "\n",
    "    def sum(self, dim):\n",
    "        if(self.autograd):\n",
    "            return Tensor(self.data.sum(dim),\n",
    "                          autograd=True,\n",
    "                          creators=[self],\n",
    "                          creation_op=\"sum_\"+str(dim))\n",
    "        return Tensor(self.data.sum(dim))\n",
    "    \n",
    "    def expand(self, dim,copies):\n",
    "\n",
    "        trans_cmd = list(range(0,len(self.data.shape)))\n",
    "        trans_cmd.insert(dim,len(self.data.shape))\n",
    "        new_data = self.data.repeat(copies).reshape(list(self.data.shape) + [copies]).transpose(trans_cmd)\n",
    "        \n",
    "        if(self.autograd):\n",
    "            return Tensor(new_data,\n",
    "                          autograd=True,\n",
    "                          creators=[self],\n",
    "                          creation_op=\"expand_\"+str(dim))\n",
    "        return Tensor(new_data)\n",
    "    \n",
    "    def transpose(self):\n",
    "        if(self.autograd):\n",
    "            return Tensor(self.data.transpose(),\n",
    "                          autograd=True,\n",
    "                          creators=[self],\n",
    "                          creation_op=\"transpose\")\n",
    "        \n",
    "        return Tensor(self.data.transpose())\n",
    "    \n",
    "    def mm(self, x):\n",
    "        if(self.autograd):\n",
    "            return Tensor(self.data.dot(x.data),\n",
    "                          autograd=True,\n",
    "                          creators=[self,x],\n",
    "                          creation_op=\"mm\")\n",
    "        return Tensor(self.data.dot(x.data))\n",
    "    \n",
    "    def sigmoid(self):\n",
    "        if(self.autograd):\n",
    "            return Tensor(1 / (1 + np.exp(-self.data)),\n",
    "                          autograd=True,\n",
    "                          creators=[self],\n",
    "                          creation_op=\"sigmoid\")\n",
    "        return Tensor(1 / (1 + np.exp(-self.data)))\n",
    "\n",
    "    def tanh(self):\n",
    "        if(self.autograd):\n",
    "            return Tensor(np.tanh(self.data),\n",
    "                          autograd=True,\n",
    "                          creators=[self],\n",
    "                          creation_op=\"tanh\")\n",
    "        return Tensor(np.tanh(self.data))\n",
    "        \n",
    "    \n",
    "    def __repr__(self):\n",
    "        return str(self.data.__repr__())\n",
    "    \n",
    "    def __str__(self):\n",
    "        return str(self.data.__str__())  \n",
    "    \n",
    "class Tanh(Layer):\n",
    "    def __init__(self):\n",
    "        super().__init__()\n",
    "    \n",
    "    def forward(self, input):\n",
    "        return input.tanh()\n",
    "    \n",
    "class Sigmoid(Layer):\n",
    "    def __init__(self):\n",
    "        super().__init__()\n",
    "    \n",
    "    def forward(self, input):\n",
    "        return input.sigmoid()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1.06372865]\n",
      "[0.75148144]\n",
      "[0.57384259]\n",
      "[0.39574294]\n",
      "[0.2482279]\n",
      "[0.15515294]\n",
      "[0.10423398]\n",
      "[0.07571169]\n",
      "[0.05837623]\n",
      "[0.04700013]\n"
     ]
    }
   ],
   "source": [
    "import numpy\n",
    "np.random.seed(0)\n",
    "\n",
    "data = Tensor(np.array([[0,0],[0,1],[1,0],[1,1]]), autograd=True)\n",
    "target = Tensor(np.array([[0],[1],[0],[1]]), autograd=True)\n",
    "\n",
    "model = Sequential([Linear(2,3), Tanh(), Linear(3,1), Sigmoid()])\n",
    "criterion = MSELoss()\n",
    "\n",
    "optim = SGD(parameters=model.get_parameters(), alpha=1)\n",
    "\n",
    "for i in range(10):\n",
    "    \n",
    "    # Predict\n",
    "    pred = model.forward(data)\n",
    "    \n",
    "    # Compare\n",
    "    loss = criterion.forward(pred, target)\n",
    "    \n",
    "    # Learn\n",
    "    loss.backward(Tensor(np.ones_like(loss.data)))\n",
    "    optim.step()\n",
    "    print(loss)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Part 13: The Embedding Layer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Embedding(Layer):\n",
    "    \n",
    "    def __init__(self, vocab_size, dim):\n",
    "        super().__init__()\n",
    "        \n",
    "        self.vocab_size = vocab_size\n",
    "        self.dim = dim\n",
    "        \n",
    "        # this random initialiation style is just a convention from word2vec\n",
    "        self.weight = (np.random.rand(vocab_size, dim) - 0.5) / dim\n",
    "        "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Part 14: Add Indexing to Autograd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "class Tensor (object):\n",
    "    \n",
    "    def __init__(self,data,\n",
    "                 autograd=False,\n",
    "                 creators=None,\n",
    "                 creation_op=None,\n",
    "                 id=None):\n",
    "        \n",
    "        self.data = np.array(data)\n",
    "        self.autograd = autograd\n",
    "        self.grad = None\n",
    "        if(id is None):\n",
    "            self.id = np.random.randint(0,100000)\n",
    "        else:\n",
    "            self.id = id\n",
    "        \n",
    "        self.creators = creators\n",
    "        self.creation_op = creation_op\n",
    "        self.children = {}\n",
    "        \n",
    "        if(creators is not None):\n",
    "            for c in creators:\n",
    "                if(self.id not in c.children):\n",
    "                    c.children[self.id] = 1\n",
    "                else:\n",
    "                    c.children[self.id] += 1\n",
    "\n",
    "    def all_children_grads_accounted_for(self):\n",
    "        for id,cnt in self.children.items():\n",
    "            if(cnt != 0):\n",
    "                return False\n",
    "        return True \n",
    "        \n",
    "    def backward(self,grad=None, grad_origin=None):\n",
    "        if(self.autograd):\n",
    " \n",
    "            if(grad is None):\n",
    "                grad = Tensor(np.ones_like(self.data))\n",
    "\n",
    "            if(grad_origin is not None):\n",
    "                if(self.children[grad_origin.id] == 0):\n",
    "                    raise Exception(\"cannot backprop more than once\")\n",
    "                else:\n",
    "                    self.children[grad_origin.id] -= 1\n",
    "\n",
    "            if(self.grad is None):\n",
    "                self.grad = grad\n",
    "            else:\n",
    "                self.grad += grad\n",
    "            \n",
    "            # grads must not have grads of their own\n",
    "            assert grad.autograd == False\n",
    "            \n",
    "            # only continue backpropping if there's something to\n",
    "            # backprop into and if all gradients (from children)\n",
    "            # are accounted for override waiting for children if\n",
    "            # \"backprop\" was called on this variable directly\n",
    "            if(self.creators is not None and \n",
    "               (self.all_children_grads_accounted_for() or \n",
    "                grad_origin is None)):\n",
    "\n",
    "                if(self.creation_op == \"add\"):\n",
    "                    self.creators[0].backward(self.grad, self)\n",
    "                    self.creators[1].backward(self.grad, self)\n",
    "                    \n",
    "                if(self.creation_op == \"sub\"):\n",
    "                    self.creators[0].backward(Tensor(self.grad.data), self)\n",
    "                    self.creators[1].backward(Tensor(self.grad.__neg__().data), self)\n",
    "\n",
    "                if(self.creation_op == \"mul\"):\n",
    "                    new = self.grad * self.creators[1]\n",
    "                    self.creators[0].backward(new , self)\n",
    "                    new = self.grad * self.creators[0]\n",
    "                    self.creators[1].backward(new, self)                    \n",
    "                    \n",
    "                if(self.creation_op == \"mm\"):\n",
    "                    c0 = self.creators[0]\n",
    "                    c1 = self.creators[1]\n",
    "                    new = self.grad.mm(c1.transpose())\n",
    "                    c0.backward(new)\n",
    "                    new = self.grad.transpose().mm(c0).transpose()\n",
    "                    c1.backward(new)\n",
    "                    \n",
    "                if(self.creation_op == \"transpose\"):\n",
    "                    self.creators[0].backward(self.grad.transpose())\n",
    "\n",
    "                if(\"sum\" in self.creation_op):\n",
    "                    dim = int(self.creation_op.split(\"_\")[1])\n",
    "                    self.creators[0].backward(self.grad.expand(dim,\n",
    "                                                               self.creators[0].data.shape[dim]))\n",
    "\n",
    "                if(\"expand\" in self.creation_op):\n",
    "                    dim = int(self.creation_op.split(\"_\")[1])\n",
    "                    self.creators[0].backward(self.grad.sum(dim))\n",
    "                    \n",
    "                if(self.creation_op == \"neg\"):\n",
    "                    self.creators[0].backward(self.grad.__neg__())\n",
    "                    \n",
    "                if(self.creation_op == \"sigmoid\"):\n",
    "                    ones = Tensor(np.ones_like(self.grad.data))\n",
    "                    self.creators[0].backward(self.grad * (self * (ones - self)))\n",
    "                \n",
    "                if(self.creation_op == \"tanh\"):\n",
    "                    ones = Tensor(np.ones_like(self.grad.data))\n",
    "                    self.creators[0].backward(self.grad * (ones - (self * self)))\n",
    "                \n",
    "                if(self.creation_op == \"index_select\"):\n",
    "                    new_grad = np.zeros_like(self.creators[0].data)\n",
    "                    indices_ = self.index_select_indices.data.flatten()\n",
    "                    grad_ = grad.data.reshape(len(indices_), -1)\n",
    "                    for i in range(len(indices_)):\n",
    "                        new_grad[indices_[i]] += grad_[i]\n",
    "                    self.creators[0].backward(Tensor(new_grad))\n",
    "                    \n",
    "    def __add__(self, other):\n",
    "        if(self.autograd and other.autograd):\n",
    "            return Tensor(self.data + other.data,\n",
    "                          autograd=True,\n",
    "                          creators=[self,other],\n",
    "                          creation_op=\"add\")\n",
    "        return Tensor(self.data + other.data)\n",
    "\n",
    "    def __neg__(self):\n",
    "        if(self.autograd):\n",
    "            return Tensor(self.data * -1,\n",
    "                          autograd=True,\n",
    "                          creators=[self],\n",
    "                          creation_op=\"neg\")\n",
    "        return Tensor(self.data * -1)\n",
    "    \n",
    "    def __sub__(self, other):\n",
    "        if(self.autograd and other.autograd):\n",
    "            return Tensor(self.data - other.data,\n",
    "                          autograd=True,\n",
    "                          creators=[self,other],\n",
    "                          creation_op=\"sub\")\n",
    "        return Tensor(self.data - other.data)\n",
    "    \n",
    "    def __mul__(self, other):\n",
    "        if(self.autograd and other.autograd):\n",
    "            return Tensor(self.data * other.data,\n",
    "                          autograd=True,\n",
    "                          creators=[self,other],\n",
    "                          creation_op=\"mul\")\n",
    "        return Tensor(self.data * other.data)    \n",
    "\n",
    "    def sum(self, dim):\n",
    "        if(self.autograd):\n",
    "            return Tensor(self.data.sum(dim),\n",
    "                          autograd=True,\n",
    "                          creators=[self],\n",
    "                          creation_op=\"sum_\"+str(dim))\n",
    "        return Tensor(self.data.sum(dim))\n",
    "    \n",
    "    def expand(self, dim,copies):\n",
    "\n",
    "        trans_cmd = list(range(0,len(self.data.shape)))\n",
    "        trans_cmd.insert(dim,len(self.data.shape))\n",
    "        new_data = self.data.repeat(copies).reshape(list(self.data.shape) + [copies]).transpose(trans_cmd)\n",
    "        \n",
    "        if(self.autograd):\n",
    "            return Tensor(new_data,\n",
    "                          autograd=True,\n",
    "                          creators=[self],\n",
    "                          creation_op=\"expand_\"+str(dim))\n",
    "        return Tensor(new_data)\n",
    "    \n",
    "    def transpose(self):\n",
    "        if(self.autograd):\n",
    "            return Tensor(self.data.transpose(),\n",
    "                          autograd=True,\n",
    "                          creators=[self],\n",
    "                          creation_op=\"transpose\")\n",
    "        \n",
    "        return Tensor(self.data.transpose())\n",
    "    \n",
    "    def mm(self, x):\n",
    "        if(self.autograd):\n",
    "            return Tensor(self.data.dot(x.data),\n",
    "                          autograd=True,\n",
    "                          creators=[self,x],\n",
    "                          creation_op=\"mm\")\n",
    "        return Tensor(self.data.dot(x.data))\n",
    "    \n",
    "    def sigmoid(self):\n",
    "        if(self.autograd):\n",
    "            return Tensor(1 / (1 + np.exp(-self.data)),\n",
    "                          autograd=True,\n",
    "                          creators=[self],\n",
    "                          creation_op=\"sigmoid\")\n",
    "        return Tensor(1 / (1 + np.exp(-self.data)))\n",
    "\n",
    "    def tanh(self):\n",
    "        if(self.autograd):\n",
    "            return Tensor(np.tanh(self.data),\n",
    "                          autograd=True,\n",
    "                          creators=[self],\n",
    "                          creation_op=\"tanh\")\n",
    "        return Tensor(np.tanh(self.data))\n",
    "    \n",
    "    def index_select(self, indices):\n",
    "\n",
    "        if(self.autograd):\n",
    "            new = Tensor(self.data[indices.data],\n",
    "                         autograd=True,\n",
    "                         creators=[self],\n",
    "                         creation_op=\"index_select\")\n",
    "            new.index_select_indices = indices\n",
    "            return new\n",
    "        return Tensor(self.data[indices.data])\n",
    "    \n",
    "    def __repr__(self):\n",
    "        return str(self.data.__repr__())\n",
    "    \n",
    "    def __str__(self):\n",
    "        return str(self.data.__str__())  \n",
    "    \n",
    "class Tanh(Layer):\n",
    "    def __init__(self):\n",
    "        super().__init__()\n",
    "    \n",
    "    def forward(self, input):\n",
    "        return input.tanh()\n",
    "    \n",
    "class Sigmoid(Layer):\n",
    "    def __init__(self):\n",
    "        super().__init__()\n",
    "    \n",
    "    def forward(self, input):\n",
    "        return input.sigmoid()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[0. 0. 0. 0. 0.]\n",
      " [1. 1. 1. 1. 1.]\n",
      " [2. 2. 2. 2. 2.]\n",
      " [2. 2. 2. 2. 2.]\n",
      " [1. 1. 1. 1. 1.]]\n"
     ]
    }
   ],
   "source": [
    "x = Tensor(np.eye(5), autograd=True)\n",
    "x.index_select(Tensor([[1,2,3],[2,3,4]])).backward()\n",
    "print(x.grad)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Part 15: The Embedding Layer (revisited)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Embedding(Layer):\n",
    "    \n",
    "    def __init__(self, vocab_size, dim):\n",
    "        super().__init__()\n",
    "        \n",
    "        self.vocab_size = vocab_size\n",
    "        self.dim = dim\n",
    "        \n",
    "        # this random initialiation style is just a convention from word2vec\n",
    "        self.weight = Tensor((np.random.rand(vocab_size, dim) - 0.5) / dim, autograd=True)\n",
    "        \n",
    "        self.parameters.append(self.weight)\n",
    "    \n",
    "    def forward(self, input):\n",
    "        return self.weight.index_select(input)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0.98874126]\n",
      "[0.6658868]\n",
      "[0.45639889]\n",
      "[0.31608168]\n",
      "[0.2260925]\n",
      "[0.16877423]\n",
      "[0.13120515]\n",
      "[0.10555487]\n",
      "[0.08731868]\n",
      "[0.07387834]\n"
     ]
    }
   ],
   "source": [
    "import numpy\n",
    "np.random.seed(0)\n",
    "\n",
    "data = Tensor(np.array([1,2,1,2]), autograd=True)\n",
    "target = Tensor(np.array([[0],[1],[0],[1]]), autograd=True)\n",
    "\n",
    "embed = Embedding(5,3)\n",
    "model = Sequential([embed, Tanh(), Linear(3,1), Sigmoid()])\n",
    "criterion = MSELoss()\n",
    "\n",
    "optim = SGD(parameters=model.get_parameters(), alpha=0.5)\n",
    "\n",
    "for i in range(10):\n",
    "    \n",
    "    # Predict\n",
    "    pred = model.forward(data)\n",
    "    \n",
    "    # Compare\n",
    "    loss = criterion.forward(pred, target)\n",
    "    \n",
    "    # Learn\n",
    "    loss.backward(Tensor(np.ones_like(loss.data)))\n",
    "    optim.step()\n",
    "    print(loss)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Part 16: The Cross Entropy Layer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "class Tensor (object):\n",
    "    \n",
    "    def __init__(self,data,\n",
    "                 autograd=False,\n",
    "                 creators=None,\n",
    "                 creation_op=None,\n",
    "                 id=None):\n",
    "        \n",
    "        self.data = np.array(data)\n",
    "        self.autograd = autograd\n",
    "        self.grad = None\n",
    "        if(id is None):\n",
    "            self.id = np.random.randint(0,100000)\n",
    "        else:\n",
    "            self.id = id\n",
    "        \n",
    "        self.creators = creators\n",
    "        self.creation_op = creation_op\n",
    "        self.children = {}\n",
    "        \n",
    "        if(creators is not None):\n",
    "            for c in creators:\n",
    "                if(self.id not in c.children):\n",
    "                    c.children[self.id] = 1\n",
    "                else:\n",
    "                    c.children[self.id] += 1\n",
    "\n",
    "    def all_children_grads_accounted_for(self):\n",
    "        for id,cnt in self.children.items():\n",
    "            if(cnt != 0):\n",
    "                return False\n",
    "        return True \n",
    "        \n",
    "    def backward(self,grad=None, grad_origin=None):\n",
    "        if(self.autograd):\n",
    " \n",
    "            if(grad is None):\n",
    "                grad = Tensor(np.ones_like(self.data))\n",
    "\n",
    "            if(grad_origin is not None):\n",
    "                if(self.children[grad_origin.id] == 0):\n",
    "                    raise Exception(\"cannot backprop more than once\")\n",
    "                else:\n",
    "                    self.children[grad_origin.id] -= 1\n",
    "\n",
    "            if(self.grad is None):\n",
    "                self.grad = grad\n",
    "            else:\n",
    "                self.grad += grad\n",
    "            \n",
    "            # grads must not have grads of their own\n",
    "            assert grad.autograd == False\n",
    "            \n",
    "            # only continue backpropping if there's something to\n",
    "            # backprop into and if all gradients (from children)\n",
    "            # are accounted for override waiting for children if\n",
    "            # \"backprop\" was called on this variable directly\n",
    "            if(self.creators is not None and \n",
    "               (self.all_children_grads_accounted_for() or \n",
    "                grad_origin is None)):\n",
    "\n",
    "                if(self.creation_op == \"add\"):\n",
    "                    self.creators[0].backward(self.grad, self)\n",
    "                    self.creators[1].backward(self.grad, self)\n",
    "                    \n",
    "                if(self.creation_op == \"sub\"):\n",
    "                    self.creators[0].backward(Tensor(self.grad.data), self)\n",
    "                    self.creators[1].backward(Tensor(self.grad.__neg__().data), self)\n",
    "\n",
    "                if(self.creation_op == \"mul\"):\n",
    "                    new = self.grad * self.creators[1]\n",
    "                    self.creators[0].backward(new , self)\n",
    "                    new = self.grad * self.creators[0]\n",
    "                    self.creators[1].backward(new, self)                    \n",
    "                    \n",
    "                if(self.creation_op == \"mm\"):\n",
    "                    c0 = self.creators[0]\n",
    "                    c1 = self.creators[1]\n",
    "                    new = self.grad.mm(c1.transpose())\n",
    "                    c0.backward(new)\n",
    "                    new = self.grad.transpose().mm(c0).transpose()\n",
    "                    c1.backward(new)\n",
    "                    \n",
    "                if(self.creation_op == \"transpose\"):\n",
    "                    self.creators[0].backward(self.grad.transpose())\n",
    "\n",
    "                if(\"sum\" in self.creation_op):\n",
    "                    dim = int(self.creation_op.split(\"_\")[1])\n",
    "                    self.creators[0].backward(self.grad.expand(dim,\n",
    "                                                               self.creators[0].data.shape[dim]))\n",
    "\n",
    "                if(\"expand\" in self.creation_op):\n",
    "                    dim = int(self.creation_op.split(\"_\")[1])\n",
    "                    self.creators[0].backward(self.grad.sum(dim))\n",
    "                    \n",
    "                if(self.creation_op == \"neg\"):\n",
    "                    self.creators[0].backward(self.grad.__neg__())\n",
    "                    \n",
    "                if(self.creation_op == \"sigmoid\"):\n",
    "                    ones = Tensor(np.ones_like(self.grad.data))\n",
    "                    self.creators[0].backward(self.grad * (self * (ones - self)))\n",
    "                \n",
    "                if(self.creation_op == \"tanh\"):\n",
    "                    ones = Tensor(np.ones_like(self.grad.data))\n",
    "                    self.creators[0].backward(self.grad * (ones - (self * self)))\n",
    "                \n",
    "                if(self.creation_op == \"index_select\"):\n",
    "                    new_grad = np.zeros_like(self.creators[0].data)\n",
    "                    indices_ = self.index_select_indices.data.flatten()\n",
    "                    grad_ = grad.data.reshape(len(indices_), -1)\n",
    "                    for i in range(len(indices_)):\n",
    "                        new_grad[indices_[i]] += grad_[i]\n",
    "                    self.creators[0].backward(Tensor(new_grad))\n",
    "                    \n",
    "                if(self.creation_op == \"cross_entropy\"):\n",
    "                    dx = self.softmax_output - self.target_dist\n",
    "                    self.creators[0].backward(Tensor(dx))\n",
    "                    \n",
    "    def __add__(self, other):\n",
    "        if(self.autograd and other.autograd):\n",
    "            return Tensor(self.data + other.data,\n",
    "                          autograd=True,\n",
    "                          creators=[self,other],\n",
    "                          creation_op=\"add\")\n",
    "        return Tensor(self.data + other.data)\n",
    "\n",
    "    def __neg__(self):\n",
    "        if(self.autograd):\n",
    "            return Tensor(self.data * -1,\n",
    "                          autograd=True,\n",
    "                          creators=[self],\n",
    "                          creation_op=\"neg\")\n",
    "        return Tensor(self.data * -1)\n",
    "    \n",
    "    def __sub__(self, other):\n",
    "        if(self.autograd and other.autograd):\n",
    "            return Tensor(self.data - other.data,\n",
    "                          autograd=True,\n",
    "                          creators=[self,other],\n",
    "                          creation_op=\"sub\")\n",
    "        return Tensor(self.data - other.data)\n",
    "    \n",
    "    def __mul__(self, other):\n",
    "        if(self.autograd and other.autograd):\n",
    "            return Tensor(self.data * other.data,\n",
    "                          autograd=True,\n",
    "                          creators=[self,other],\n",
    "                          creation_op=\"mul\")\n",
    "        return Tensor(self.data * other.data)    \n",
    "\n",
    "    def sum(self, dim):\n",
    "        if(self.autograd):\n",
    "            return Tensor(self.data.sum(dim),\n",
    "                          autograd=True,\n",
    "                          creators=[self],\n",
    "                          creation_op=\"sum_\"+str(dim))\n",
    "        return Tensor(self.data.sum(dim))\n",
    "    \n",
    "    def expand(self, dim,copies):\n",
    "\n",
    "        trans_cmd = list(range(0,len(self.data.shape)))\n",
    "        trans_cmd.insert(dim,len(self.data.shape))\n",
    "        new_data = self.data.repeat(copies).reshape(list(self.data.shape) + [copies]).transpose(trans_cmd)\n",
    "        \n",
    "        if(self.autograd):\n",
    "            return Tensor(new_data,\n",
    "                          autograd=True,\n",
    "                          creators=[self],\n",
    "                          creation_op=\"expand_\"+str(dim))\n",
    "        return Tensor(new_data)\n",
    "    \n",
    "    def transpose(self):\n",
    "        if(self.autograd):\n",
    "            return Tensor(self.data.transpose(),\n",
    "                          autograd=True,\n",
    "                          creators=[self],\n",
    "                          creation_op=\"transpose\")\n",
    "        \n",
    "        return Tensor(self.data.transpose())\n",
    "    \n",
    "    def mm(self, x):\n",
    "        if(self.autograd):\n",
    "            return Tensor(self.data.dot(x.data),\n",
    "                          autograd=True,\n",
    "                          creators=[self,x],\n",
    "                          creation_op=\"mm\")\n",
    "        return Tensor(self.data.dot(x.data))\n",
    "    \n",
    "    def sigmoid(self):\n",
    "        if(self.autograd):\n",
    "            return Tensor(1 / (1 + np.exp(-self.data)),\n",
    "                          autograd=True,\n",
    "                          creators=[self],\n",
    "                          creation_op=\"sigmoid\")\n",
    "        return Tensor(1 / (1 + np.exp(-self.data)))\n",
    "\n",
    "    def tanh(self):\n",
    "        if(self.autograd):\n",
    "            return Tensor(np.tanh(self.data),\n",
    "                          autograd=True,\n",
    "                          creators=[self],\n",
    "                          creation_op=\"tanh\")\n",
    "        return Tensor(np.tanh(self.data))\n",
    "    \n",
    "    def index_select(self, indices):\n",
    "\n",
    "        if(self.autograd):\n",
    "            new = Tensor(self.data[indices.data],\n",
    "                         autograd=True,\n",
    "                         creators=[self],\n",
    "                         creation_op=\"index_select\")\n",
    "            new.index_select_indices = indices\n",
    "            return new\n",
    "        return Tensor(self.data[indices.data])\n",
    "    \n",
    "    def cross_entropy(self, target_indices):\n",
    "\n",
    "        temp = np.exp(self.data)\n",
    "        softmax_output = temp / np.sum(temp,\n",
    "                                       axis=len(self.data.shape)-1,\n",
    "                                       keepdims=True)\n",
    "        \n",
    "        t = target_indices.data.flatten()\n",
    "        p = softmax_output.reshape(len(t),-1)\n",
    "        target_dist = np.eye(p.shape[1])[t]\n",
    "        loss = -(np.log(p) * (target_dist)).sum(1).mean()\n",
    "    \n",
    "        if(self.autograd):\n",
    "            out = Tensor(loss,\n",
    "                         autograd=True,\n",
    "                         creators=[self],\n",
    "                         creation_op=\"cross_entropy\")\n",
    "            out.softmax_output = softmax_output\n",
    "            out.target_dist = target_dist\n",
    "            return out\n",
    "\n",
    "        return Tensor(loss)\n",
    "        \n",
    "    \n",
    "    def __repr__(self):\n",
    "        return str(self.data.__repr__())\n",
    "    \n",
    "    def __str__(self):\n",
    "        return str(self.data.__str__())  \n",
    "    \n",
    "class Tanh(Layer):\n",
    "    def __init__(self):\n",
    "        super().__init__()\n",
    "    \n",
    "    def forward(self, input):\n",
    "        return input.tanh()\n",
    "    \n",
    "class Sigmoid(Layer):\n",
    "    def __init__(self):\n",
    "        super().__init__()\n",
    "    \n",
    "    def forward(self, input):\n",
    "        return input.sigmoid()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "class CrossEntropyLoss(object):\n",
    "    \n",
    "    def __init__(self):\n",
    "        super().__init__()\n",
    "    \n",
    "    def forward(self, input, target):\n",
    "        return input.cross_entropy(target)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.3885032434928422\n",
      "0.9558181509266037\n",
      "0.6823083585795604\n",
      "0.5095259967493119\n",
      "0.39574491472895856\n",
      "0.31752527285348264\n",
      "0.2617222861964216\n",
      "0.22061283923954234\n",
      "0.18946427334830068\n",
      "0.16527389263866668\n"
     ]
    }
   ],
   "source": [
    "import numpy\n",
    "np.random.seed(0)\n",
    "\n",
    "# data indices\n",
    "data = Tensor(np.array([1,2,1,2]), autograd=True)\n",
    "\n",
    "# target indices\n",
    "target = Tensor(np.array([0,1,0,1]), autograd=True)\n",
    "\n",
    "model = Sequential([Embedding(3,3), Tanh(), Linear(3,4)])\n",
    "criterion = CrossEntropyLoss()\n",
    "\n",
    "optim = SGD(parameters=model.get_parameters(), alpha=0.1)\n",
    "\n",
    "for i in range(10):\n",
    "    \n",
    "    # Predict\n",
    "    pred = model.forward(data)\n",
    "    \n",
    "    # Compare\n",
    "    loss = criterion.forward(pred, target)\n",
    "    \n",
    "    # Learn\n",
    "    loss.backward(Tensor(np.ones_like(loss.data)))\n",
    "    optim.step()\n",
    "    print(loss)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Part 17: The Recurrent Neural Network Layer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 400,
   "metadata": {},
   "outputs": [],
   "source": [
    "class RNNCell(Layer):\n",
    "    \n",
    "    def __init__(self, n_inputs, n_hidden, n_output, activation='sigmoid'):\n",
    "        super().__init__()\n",
    "\n",
    "        self.n_inputs = n_inputs\n",
    "        self.n_hidden = n_hidden\n",
    "        self.n_output = n_output\n",
    "        \n",
    "        if(activation == 'sigmoid'):\n",
    "            self.activation = Sigmoid()\n",
    "        elif(activation == 'tanh'):\n",
    "            self.activation == Tanh()\n",
    "        else:\n",
    "            raise Exception(\"Non-linearity not found\")\n",
    "\n",
    "        self.w_ih = Linear(n_inputs, n_hidden)\n",
    "        self.w_hh = Linear(n_hidden, n_hidden)\n",
    "        self.w_ho = Linear(n_hidden, n_output)\n",
    "        \n",
    "        self.parameters += self.w_ih.get_parameters()\n",
    "        self.parameters += self.w_hh.get_parameters()\n",
    "        self.parameters += self.w_ho.get_parameters()        \n",
    "    \n",
    "    def forward(self, input, hidden):\n",
    "        from_prev_hidden = self.w_hh.forward(hidden)\n",
    "        combined = self.w_ih.forward(input) + from_prev_hidden\n",
    "        new_hidden = self.activation.forward(combined)\n",
    "        output = self.w_ho.forward(new_hidden)\n",
    "        return output, new_hidden\n",
    "    \n",
    "    def init_hidden(self, batch_size=1):\n",
    "        return Tensor(np.zeros((batch_size,self.n_hidden)), autograd=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 401,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys,random,math\n",
    "from collections import Counter\n",
    "import numpy as np\n",
    "\n",
    "f = open('tasksv11/en/qa1_single-supporting-fact_train.txt','r')\n",
    "raw = f.readlines()\n",
    "f.close()\n",
    "\n",
    "tokens = list()\n",
    "for line in raw[0:1000]:\n",
    "    tokens.append(line.lower().replace(\"\\n\",\"\").split(\" \")[1:])\n",
    "\n",
    "new_tokens = list()\n",
    "for line in tokens:\n",
    "    new_tokens.append(['-'] * (6 - len(line)) + line)\n",
    "\n",
    "tokens = new_tokens\n",
    "\n",
    "vocab = set()\n",
    "for sent in tokens:\n",
    "    for word in sent:\n",
    "        vocab.add(word)\n",
    "\n",
    "vocab = list(vocab)\n",
    "\n",
    "word2index = {}\n",
    "for i,word in enumerate(vocab):\n",
    "    word2index[word]=i\n",
    "    \n",
    "def words2indices(sentence):\n",
    "    idx = list()\n",
    "    for word in sentence:\n",
    "        idx.append(word2index[word])\n",
    "    return idx\n",
    "\n",
    "indices = list()\n",
    "for line in tokens:\n",
    "    idx = list()\n",
    "    for w in line:\n",
    "        idx.append(word2index[w])\n",
    "    indices.append(idx)\n",
    "\n",
    "data = np.array(indices)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 402,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 403,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 404,
   "metadata": {},
   "outputs": [],
   "source": [
    "embed = Embedding(vocab_size=len(vocab),dim=16)\n",
    "model = RNNCell(n_inputs=16, n_hidden=16, n_output=len(vocab))\n",
    "\n",
    "criterion = CrossEntropyLoss()\n",
    "optim = SGD(parameters=model.get_parameters() + embed.get_parameters(), alpha=0.05)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 405,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loss: 0.47631100976371393 % Correct: 0.01\n",
      "Loss: 0.17189538896184856 % Correct: 0.28\n",
      "Loss: 0.1460940222788725 % Correct: 0.37\n",
      "Loss: 0.13845863915406884 % Correct: 0.37\n",
      "Loss: 0.135574472565278 % Correct: 0.37\n"
     ]
    }
   ],
   "source": [
    "for iter in range(1000):\n",
    "    batch_size = 100\n",
    "    total_loss = 0\n",
    "    \n",
    "    hidden = model.init_hidden(batch_size=batch_size)\n",
    "\n",
    "    for t in range(5):\n",
    "        input = Tensor(data[0:batch_size,t], autograd=True)\n",
    "        rnn_input = embed.forward(input=input)\n",
    "        output, hidden = model.forward(input=rnn_input, hidden=hidden)\n",
    "\n",
    "    target = Tensor(data[0:batch_size,t+1], autograd=True)    \n",
    "    loss = criterion.forward(output, target)\n",
    "    loss.backward()\n",
    "    optim.step()\n",
    "    total_loss += loss.data\n",
    "    if(iter % 200 == 0):\n",
    "        p_correct = (target.data == np.argmax(output.data,axis=1)).mean()\n",
    "        print(\"Loss:\",total_loss / (len(data)/batch_size),\"% Correct:\",p_correct)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 362,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 406,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Context: - mary moved to the \n",
      "True: bathroom.\n",
      "Pred: office.\n"
     ]
    }
   ],
   "source": [
    "batch_size = 1\n",
    "hidden = model.init_hidden(batch_size=batch_size)\n",
    "for t in range(5):\n",
    "    input = Tensor(data[0:batch_size,t], autograd=True)\n",
    "    rnn_input = embed.forward(input=input)\n",
    "    output, hidden = model.forward(input=rnn_input, hidden=hidden)\n",
    "\n",
    "target = Tensor(data[0:batch_size,t+1], autograd=True)    \n",
    "loss = criterion.forward(output, target)\n",
    "\n",
    "ctx = \"\"\n",
    "for idx in data[0:batch_size][0][0:-1]:\n",
    "    ctx += vocab[idx] + \" \"\n",
    "print(\"Context:\",ctx)\n",
    "print(\"True:\",vocab[target.data[0]])\n",
    "print(\"Pred:\", vocab[output.data.argmax()])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
